Electricity Bills Analysis - ENCEVI 2018
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 516468 27.6 1160092 62 NA 631060 33.8
## Vcells 963671 7.4 8388608 64 16384 1761712 13.5
##Load and attach add-on packages
UsePackage('ProjectTemplate')
UsePackage('stringr')
UsePackage('psych')
UsePackage('ggplot2')
UsePackage('dplyr')
UsePackage('reshape2')
UsePackage('plotly')
UsePackage('processx')
UsePackage('doBy')
UsePackage('foreign')
UsePackage('survey')
UsePackage('reticulate')
UsePackage('tidyverse')
UsePackage('broman') #https://kbroman.org/knitr_knutshell/pages/Rmarkdown.html
UsePackage("rmdformats")
UsePackage('remotes')
UsePackage('summarytools')
UsePackage('DT')
UsePackage('Hmisc')<style type="text/css">
img { background-color:transparent; border:0; } table.st-table > thead > tr { background-color: #eeeeee; } table.st-table th { text-align: center; } table.st-table td span { display: block; } .st-container { width: 100%; padding-right: 15px; padding-left: 15px; margin-right: auto; margin-left: auto; margin-top: 15px; } .st-multiline { white-space: pre; } .st-table { width: auto; table-layout: auto; margin-top: 20px; margin-bottom: 20px; max-width: 100%; background-color: transparent; border-collapse: collapse; } .st-table > thead > tr > th, .st-table > tbody > tr > th, .st-table > tfoot > tr > th, .st-table > thead > tr > td, .st-table > tbody > tr > td, .st-table > tfoot > tr > td { vertical-align: middle; } .st-table-bordered { border: 1px solid #bbbbbb; } .st-table-bordered > thead > tr > th, .st-table-bordered > tbody > tr > th, .st-table-bordered > tfoot > tr > th, .st-table-bordered > thead > tr > td, .st-table-bordered > tbody > tr > td, .st-table-bordered > tfoot > tr > td { border: 1px solid #cccccc; } .st-table-bordered > thead > tr > th, .st-table-bordered > thead > tr > td, .st-table thead > tr > th { border-bottom: none; } .st-table td, .st-table th { padding: 8px; } .st-table > thead > tr { background-color: #eeeeee; } .st-table-bordered > thead > tr > th, .st-table-bordered > tbody > tr > th, .st-table-bordered > thead > tr > td, .st-table-bordered > tbody > tr > td { border: 1px solid #cccccc; } .st-table-striped > tbody > tr:nth-of-type(odd) { background-color: #ffffff; } .st-table-striped > tbody > tr:nth-of-type(even) { background-color: #f8f8f8; } .st-table-striped > tbody > tr:nth-of-type(odd) { background-color: #ffffff; } .st-table-striped > tbody > tr:nth-of-type(even) { background-color: #f9f9f9; } .st-descr-table > thead > tr > th, .st-descr-table > tbody > tr > th, .st-descr-table > tfoot > tr > th, .st-descr-table > thead > tr > td, .st-descr-table > tbody > tr > td, .st-descr-table > tfoot > tr > td { padding-left: 24px; padding-right: 24px; word-wrap: break-word; } .st-descr-table td { text-align: right; } .st-freq-table, .st-freq-table-nomiss, .st-cross-table { border: medium none; } .st-freq-table > thead > tr:nth-child(1) > th:nth-child(1), .st-cross-table > thead > tr:nth-child(1) > th:nth-child(1), .st-cross-table > thead > tr:nth-child(1) > th:nth-child(3) { border:none; background-color: #ffffff; text-align: center; } .st-freq-table > thead > tr > th, .st-freq-table > tbody > tr > th, .st-freq-table > tfoot > tr > th, .st-freq-table > thead > tr > td, .st-freq-table > tbody > tr > td, .st-freq-table > tfoot > tr > td, .st-freq-table-nomiss > thead > tr > th, .st-freq-table-nomiss > tbody > tr > th, .st-freq-table-nomiss > tfoot > tr > th, .st-freq-table-nomiss > thead > tr > td, .st-freq-table-nomiss > tbody > tr > td, .st-freq-table-nomiss > tfoot > tr > td, .st-cross-table > thead > tr > th, .st-cross-table > tbody > tr > th, .st-cross-table > tfoot > tr > th, .st-cross-table > thead > tr > td, .st-cross-table > tbody > tr > td, .st-cross-table > tfoot > tr > td { padding-left: 20px; padding-right: 20px; } .st-protect-top-border { border-top:1px solid #cccccc !important; } .st-cross-table td { text-align: center; } .st-small { font-size: 13px; } .st-small td, .st-small th { padding: 8px; } .st-small > thead > tr > th, .st-small > tbody > tr > th, .st-small > tfoot > tr > th, .st-small > thead > tr > td, .st-small > tbody > tr > td, .st-small > tfoot > tr > td { padding-left: 12px; padding-right: 12px; } </style>
Loading datasets
#This table contains characteristics on the main final uses of energy in the
#dwellings according to the socioeconomic characteristics of the households
df.encevi <- read.csv("input/encevi.csv")
#This table contains the characteristics of the dwellings inhabited by the
#members of the households surveyed.
df.dwelling <- read.csv("input/vivienda.csv")
#In this table are contained the characteristics of the households that
#inhabit the dwellings.
df.household <- read.csv("input/hogar.csv")
#Importing data of summer months per region
df.summer.months <- read.csv("input/summer_by_state_cfe_052419.csv")# Get municipality codes ordered and cleaned, results are stored in
# folder "/input", as INEGI_agem_short.csv, these data was provided
# by INEGI and it is related to ENCEVI 2018 survey
#source('municipality_codes_inegi.R')
# Get tariffs and municipality codes by household ids, the results are
# stored in folder "/input", as agem_tariff_byfolio.csv.
source("merge_ids_agem_tariffs.R")
df.tariffs<- read.csv("input/agem_tariff_byfolio.csv")Cleaning and merging dataframes
names(df.encevi) <- tolower(colnames(df.encevi))
names(df.dwelling) <- tolower(colnames(df.dwelling))
names(df.household) <- tolower(colnames(df.household))
#Rename first column as folio. This piece of code corrects a bug in the command
colnames(df.encevi)[1] <- "folio"
colnames(df.dwelling)[1] <- "folio"
colnames(df.household)[1] <- "folio"
colnames(df.dwelling)[colnames(df.dwelling)=="entidad"] <- "state.id"# Merging ENCEVI and Dwelling datasets
df.enc.dwell <- merge(df.encevi, df.dwelling, by="folio")
# Merging encevi-dwelling dataframe with INEGI municipality codes dataframe
df.enc.dwell <- merge(df.enc.dwell, df.tariffs, by="folio")df.enc.dwell$region.f <- factor(df.enc.dwell$region,
levels = c(1, 2, 3),
labels = c("Extreme hot", "Temperate", "Tropical"))
df.enc.dwell$tipo_tarif.f <- factor(df.enc.dwell$tipo_tarif,
levels = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9),
labels = c("2", "1", "1A", "1B", "1C", "1D",
"1E", "1F", "DAC", "Don't know"))
df.enc.dwell$state.f <- factor(df.enc.dwell$state.id,
levels = c("1", "2", "3", "4", "5", "6", "7",
"8", "9", "10","11", "12", "13", "14",
"15", "16", "17", "18", "19", "20",
"21", "22", "23", "24", "25", "26",
"27", "28", "29", "30", "31", "32"),
labels = c("Aguascalientes", "Baja California",
"Baja California Sur", "Campeche",
"Coahuila", "Colima", "Chiapas",
"Chihuahua", "Mexico City",
"Durango", "Guanajuato", "Guerrero",
"Hidalgo", "Jalisco", "Mexico",
"Michoacan", "Morelos", "Nayarit",
"NuevoLeon", "Oaxaca", "Puebla",
"Queretaro", "Quintana Roo",
"San Luis Potosi", "Sinaloa",
"Sonora", "Tabasco", "Tamaulipas",
"Tlaxcala", "Veracruz", "Yucatan",
"Zacatecas"))df.summer.months <- subset(df.summer.months, year==2017,
select=c('state.id', 'month', 'summer'))
#df.enc.dwell <- merge(df.enc.dwell, df.summer.months, by="state.id")
df.enc.dwell$dummy.house <- 1Descriptive Analysis
Dwellings surveyed per region
# Survey design construction (mmc), this computes the statistical errors
df.enc.dwell$dummy.house <- 1
mmc <- svydesign(id=~upm, strata=~est_dis,
data=df.enc.dwell, weights=~factor_sem)
## Estimación total de viviendas particulares habitadas
tb.dwell <- svytotal(df.enc.dwell$dummy.house, mmc)
# Estimación por Región del total de viviendas particulares habitadas
tb.dwell.region <- svyby(~dummy.house, by=~region.f, mmc, svytotal)
ea01n <- tb.dwell[[1]] # Estimación puntual
er01n <- t(tb.dwell.region[2]) # Estimación puntual-Región
ea01e <- SE(tb.dwell) # Error estándar
er01e <- t(data.frame(SE(tb.dwell.region))) # Error estándar-Región
colnames(tb.dwell.region)[colnames(tb.dwell.region)=="dummy.house"] <- "dwellings"
colnames(tb.dwell.region)[colnames(tb.dwell.region)=="region.f"] <- "region"
tb.dwell <- as.data.frame(tb.dwell)
tb.dwell.region$se <- round(tb.dwell.region$se,3)
datatable(tb.dwell.region, rownames = FALSE,
options = list(
dom = 't',
scrollX = TRUE,
fixedColumns = TRUE
))- Number of dwellings represented by the survey: 33,162,148, standard error: 151,533.1
Dwellings connected to the grid
#Get dwellings connected to the grid
df.enc.dwell$grid <- ifelse(df.enc.dwell$electri %in% "1", 1 ,0)
#Total dwellings with insulation
total.dwell.grid <- sum(df.enc.dwell$grid * df.enc.dwell$factor_sem)
#format(total.dwell.grid, big.mark=",", small.interval=3)- Number of dwellings connected to the grid: 32,808,508*
freq(df.enc.dwell$grid, weight = df.enc.dwell$factor_sem, report.nas = FALSE,
plain.ascii = FALSE, style = "simple", method = "render",
headings = FALSE, na.rm= TRUE)| Freq | % | % Cum. | |
|---|---|---|---|
| 0 | 353640.00 | 1.07 | 1.07 |
| 1 | 32808508.00 | 98.93 | 100.00 |
| Total | 33162148.00 | 100.00 | 100.00 |
Dwellings connected to the grid by region
Number of dwellings connected to the grid by region
mmc <- svydesign(id=~upm, data=df.enc.dwell, strata=~est_dis, weights=~factor_sem)
tb.grid.region <- cbind(svytable(~region.f+grid, design = mmc),
prop.table(svytable(~region.f+grid, design = mmc)))
tb.grid.region <- as.data.frame(round(tb.grid.region, 4))
#colnames(tb.grid.region)[colnames(tb.grid.region)=="region.f"] <- "region"
#Printing table
tb.grid.region <- cbind(tb.grid.region[1], tb.grid.region[3], tb.grid.region[2], tb.grid.region[4])
tb.grid.region <- rbind(tb.grid.region, colSums(tb.grid.region))
colnames(tb.grid.region)<-c("dwellings (no grid)", "percent (no grid)", "dwellings (grid)", "percent (grid)")
tb.rownames <- rownames(tb.grid.region)
rownames(tb.grid.region) <- replace(tb.rownames, tb.rownames==4, "Total")
#rownames(tb.grid.region) <- tb.rownames
datatable(tb.grid.region, rownames = T,
options = list(
dom = 't',
scrollX = TRUE,
fixedColumns = TRUE
))mmc <- svydesign(id=~upm, strata=~est_dis,
data=df.enc.dwell, weights=~factor_sem)
## Estimating number of dwellings connected to the grid by region
tb.grid.region <- svyby(~dummy.house, by=~region.f+grid, mmc, svytotal)
colnames(tb.grid.region)[colnames(tb.grid.region) == "dummy.house"] <- "dwellings"
tb.grid.region <- merge(tb.grid.region,
tb.dwell.region[ , c("region.f", "dummy.house")],
by="region.f")
colnames(tb.grid.region)[colnames(tb.grid.region) == "dummy.house"] <- "dwellings.region"
tb.grid.region <- transform(tb.grid.region,
percent.grid = myround(tb.grid.region$dwellings /
tb.grid.region$dwellings.region, 3))
tb.grid.region$se <- myround(tb.grid.region$se,3)
##Printing table
datatable(tb.grid.region, rownames = FALSE,
options = list(
dom = 't',
scrollX = TRUE,
fixedColumns = TRUE
))Electricity Bill Dates
df.enc.dwell$bill.ini.date1 <- NA
df.enc.dwell$bill.end.date1 <- NA
# The data was obtained from January 2018 to June 2018.
# So the bills could be from 2017 or 2018. As the survey does not report
# the year of the electricity bill, it is calculated here
df.enc.dwell$year_ini1 <- "2018"
df.enc.dwell$year_end1 <- "2018"
# Case 1. If the initial period of the bill is from june to december,
# it is assumed that the year of the initial period is 2017
df.enc.dwell$year_ini1[df.enc.dwell$mes_inic1 >= 6] <- 2017
# Case 2. If the final period of the bill is from july to december,
# it is assumed that the year of the final period is 2017
df.enc.dwell$year_end1[df.enc.dwell$mes_final1 >= 7] <- 2017
# Case 3. If the initia month period of the bill is higher than the final month,
# it is assumed that the year of the initial period is 2017
df.enc.dwell$year_ini1[df.enc.dwell$mes_inic1 > df.enc.dwell$mes_final1] <- 2017
df.enc.dwell$bill.ini.date1 <- str_replace_all(paste(df.enc.dwell$mes_inic1, "-", df.enc.dwell$inicia1, "-",
df.enc.dwell$year_ini1), pattern=" ", repl="")
df.enc.dwell$bill.end.date1 <- str_replace_all(paste(df.enc.dwell$mes_final1, "-", df.enc.dwell$final1, "-",
df.enc.dwell$year_end1), pattern=" ", repl="")
df.enc.dwell$bill.ini.date1 <- as.Date(as.character(df.enc.dwell$bill.ini.date1),
format="%m-%d-%Y")
df.enc.dwell$bill.end.date1 <- as.Date(as.character(df.enc.dwell$bill.end.date1),
format="%m-%d-%Y")
df.enc.dwell$bill.days1 <- as.integer((df.enc.dwell$bill.end.date1 - df.enc.dwell$bill.ini.date1))
# After obtaining the number of days of the period in the electricity bill.
# There are still some special cases that are corrected here.
# Case 4. If the period in the electricity bill is longer than 1 year, the
# initial year is assumed to be 2018
df.enc.dwell$year_ini1[df.enc.dwell$bill.days1 >= 365 &
(df.enc.dwell$mes_inic1 <= df.enc.dwell$mes_final1)] <- 2018
# Case 5. If the period in the electricity bill is negative, the initial
# year is assumed to be 2017
df.enc.dwell$year_ini1[df.enc.dwell$bill.days1 < 0] <- 2017
df.enc.dwell$bill.end.date1 <- str_replace_all(paste(df.enc.dwell$mes_final1, "-", df.enc.dwell$final1, "-",
df.enc.dwell$year_end1), pattern=" ", repl="")
df.enc.dwell$bill.ini.date1 <- str_replace_all(paste(df.enc.dwell$mes_inic1, "-" , df.enc.dwell$inicia1, "-",
df.enc.dwell$year_ini1), pattern=" ", repl="")
df.enc.dwell$bill.end.date1 <- str_replace_all(paste(df.enc.dwell$mes_final1, "-" , df.enc.dwell$final1, "-",
df.enc.dwell$year_end1), pattern=" ", repl="")
df.enc.dwell$bill.ini.date1 <- as.Date(as.character(df.enc.dwell$bill.ini.date1), format="%m-%d-%Y")
df.enc.dwell$bill.end.date1 <- as.Date(as.character(df.enc.dwell$bill.end.date1), format="%m-%d-%Y")
df.enc.dwell$bill.days1 <- as.integer((df.enc.dwell$bill.end.date1 - df.enc.dwell$bill.ini.date1))
pl.hist.bill.days1 <- plot_ly(x = df.enc.dwell$bill.days1,
type = "histogram",
histnorm = "probability")%>%
layout(title = 'Distribution of Days Billed by CFE',
xaxis = list(title = "days", range = c(0, 350)),
yaxis = list(title = "probability"))
pl.hist.bill.days1stat.bill.days1 <- descr(df.enc.dwell$bill.days1, style = "rmarkdown", transpose = TRUE,
stats = c("mean", "med", "sd", "min", "max", "n.valid", "pct.valid"),
headings = TRUE)
stat.bill.days1$percent.valid <-stat.bill.days1$`% Valid`
stat.bill.days1### Descriptive Statistics
#### df.enc.dwell$bill.days1
**N:** 28953
| | Mean | Median | Std.Dev. | Min | Max | N.Valid | % Valid |
|---------------:|------:|-------:|---------:|-------:|-------:|---------:|--------:|
| **bill.days1** | 56.85 | 61.00 | 16.64 | -61.00 | 364.00 | 11231.00 | 38.79 |
Table: Table continues below
| | percent.valid |
|---------------:|--------------:|
| **bill.days1** | 38.79 |
- Only : 11,231 households surveyed (38.7904535%) provided information about their bills.
The Federal Electricity Commission (CFE) bills their residential clients in a monthly or bi-monthly fashion.
The period of time between billings for monthly accounts is between 27-35 days and the period between billings for bi-monthly accounts is between 56-65 days. So, in this analysis we discarted all the bills outside these two ranges. Bills that are outside these ranges usually include special fees, like late fees or installation fees.
Each bill includes a 16% value added tax (known as IVA), and a 8% tax (DAP) that is used to pay the public street lighting system in Mexico.
If a user has a high consumption, will pay also a fixed monthly fee, that is different for each tariff.
df.enc.dwell$bill.days1.valid <- df.enc.dwell$bill.days1
#Keeping only the bi-monthly (56-64 days) and monthly bills (27-33 days)
df.enc.dwell$bill.days1.valid[df.enc.dwell$bill.days1.valid < 27 ] <- NA
df.enc.dwell$bill.days1.valid[df.enc.dwell$bill.days1.valid > 65 ] <- NA
df.enc.dwell$bill.days1.valid[df.enc.dwell$bill.days1.valid > 35 &
df.enc.dwell$bill.days1.valid < 56 ] <- NA
stat.bill.days1.valid <- descr(df.enc.dwell$bill.days1.valid, style = "rmarkdown", transpose = TRUE,
stats = c("mean", "med", "sd", "min", "max", "n.valid", "pct.valid"),
headings = TRUE)
print(stat.bill.days1.valid,
footnote = "<b>Source:</b> INEGI, 2019<br/><i>ENCEVI 2018:</i>")### Descriptive Statistics
#### df.enc.dwell$bill.days1.valid
**N:** 28953
| | Mean | Median | Std.Dev. | Min | Max | N.Valid | % Valid |
|---------------------:|------:|-------:|---------:|------:|------:|---------:|--------:|
| **bill.days1.valid** | 56.16 | 61.00 | 11.02 | 27.00 | 65.00 | 10676.00 | 36.87 |
svd.dwell <- svydesign(id=~upm, strata=~est_dis,
data=df.enc.dwell, weights=~factor_sem)
## Estimating number of dwellings connected to the grid
mean <- svymean(~bill.days1, svd.dwell, na.rm=TRUE)
variance <- svyvar(~bill.days1, svd.dwell, na.rm=TRUE)
#svytotal(~bill.days1, svd.dwell, na.rm=TRUE)
quantiles <- svyquantile(~bill.days1, svd.dwell, c(.25,.5,.75),ci=TRUE, na.rm=TRUE)
mean
variance
quantiles
sqrt(as.matrix(variance)[,1])Calculating days of consumption of 2nd electricity bill (users that pay two electricity bills each period)
df.enc.dwell$bill.ini.date2 <- NA
df.enc.dwell$bill.end.date2 <- NA
# The data was obtained from January 2018 to June 2018.
# So the bills could be from 2017 or 2018. As the survey does not report
# the year of the electricity bill, it is calculated here
df.enc.dwell$year_ini2 <- "2018"
df.enc.dwell$year_end2 <- "2018"
# Case 2. If the initial period of the bill is from june to december,
# it is assumed that the year of the initial period is 2017
df.enc.dwell$year_ini2[df.enc.dwell$mes_inic2 >= 6] <- 2017
# Case 2. If the final period of the bill is from july to december,
# it is assumed that the year of the final period is 2017
df.enc.dwell$year_end2[df.enc.dwell$mes_final2 >= 7] <- 2017
# Case 3. If the initia month period of the bill is higher than the final month,
# it is assumed that the year of the initial period is 2017
df.enc.dwell$year_ini2[df.enc.dwell$mes_inic2 > df.enc.dwell$mes_final2] <- 2017
df.enc.dwell$bill.ini.date2 <- str_replace_all(paste(df.enc.dwell$mes_inic2, "-", df.enc.dwell$inicia2, "-",
df.enc.dwell$year_ini2), pattern=" ", repl="")
df.enc.dwell$bill.end.date2 <- str_replace_all(paste(df.enc.dwell$mes_final2, "-", df.enc.dwell$final2, "-",
df.enc.dwell$year_end2), pattern=" ", repl="")
df.enc.dwell$bill.ini.date2 <- as.Date(as.character(df.enc.dwell$bill.ini.date2),
format="%m-%d-%Y")
df.enc.dwell$bill.end.date2 <- as.Date(as.character(df.enc.dwell$bill.end.date2),
format="%m-%d-%Y")
df.enc.dwell$bill.days2 <- as.integer((df.enc.dwell$bill.end.date2 - df.enc.dwell$bill.ini.date2))
# After obtaining the number of days of the period in the electricity bill.
# There are still some special cases that are corrected here.
# Case 4. If the period in the electricity bill is longer than 2 year, the
# initial year is assumed to be 2018
df.enc.dwell$year_ini2[df.enc.dwell$bill.days2 >= 365 &
(df.enc.dwell$mes_inic2 <= df.enc.dwell$mes_final2)] <- 2018
# Case 5. If the period in the electricity bill is negative, the initial
# year is assumed to be 2017
df.enc.dwell$year_ini2[df.enc.dwell$bill.days2 < 0] <- 2017
df.enc.dwell$bill.end.date2 <- str_replace_all(paste(df.enc.dwell$mes_final2, "-", df.enc.dwell$final2, "-",
df.enc.dwell$year_end2), pattern=" ", repl="")
df.enc.dwell$bill.ini.date2 <- str_replace_all(paste(df.enc.dwell$mes_inic2, "-" , df.enc.dwell$inicia2, "-",
df.enc.dwell$year_ini2), pattern=" ", repl="")
df.enc.dwell$bill.end.date2 <- str_replace_all(paste(df.enc.dwell$mes_final2, "-" , df.enc.dwell$final2, "-",
df.enc.dwell$year_end2), pattern=" ", repl="")
df.enc.dwell$bill.ini.date2 <- as.Date(as.character(df.enc.dwell$bill.ini.date2), format="%m-%d-%Y")
df.enc.dwell$bill.end.date2 <- as.Date(as.character(df.enc.dwell$bill.end.date2), format="%m-%d-%Y")
df.enc.dwell$bill.days2 <- as.integer((df.enc.dwell$bill.end.date2 - df.enc.dwell$bill.ini.date2))
stat.bill.days2 <- descr(df.enc.dwell$bill.days2, style = "rmarkdown", transpose = TRUE,
stats = c("mean", "med", "sd", "min", "max", "n.valid", "pct.valid"),
headings = TRUE)
print(stat.bill.days2,
footnote = "<b>Source:</b> INEGI, 2019<br/><i>ENCEVI 2018:</i>")Descriptive Statistics
df.enc.dwell$bill.days2
N: 28953
| Mean | Median | Std.Dev. | Min | Max | N.Valid | % Valid | |
|---|---|---|---|---|---|---|---|
| bill.days2 | 55.46 | 60.00 | 14.35 | 0.00 | 92.00 | 106.00 | 0.37 |
df.enc.dwell$bill.days2.valid <- df.enc.dwell$bill.days2
#Keeping only the bi-monthly (56-64 days) and monthly bills (27-33 days)
df.enc.dwell$bill.days2.valid[df.enc.dwell$bill.days2.valid < 27 ] <- NA
df.enc.dwell$bill.days2.valid[df.enc.dwell$bill.days2.valid > 65 ] <- NA
df.enc.dwell$bill.days2.valid[df.enc.dwell$bill.days2.valid > 33 &
df.enc.dwell$bill.days2.valid < 56 ] <- NA
stat.bill.days2.valid <- descr(df.enc.dwell$bill.days2.valid, style = "rmarkdown", transpose = TRUE,
stats = c("mean", "med", "sd", "min", "max", "n.valid", "pct.valid"),
headings = TRUE)
print(stat.bill.days2.valid,
footnote = "<b>Source:</b> INEGI, 2019<br/><i>ENCEVI 2018:</i>")### Descriptive Statistics
#### df.enc.dwell$bill.days2.valid
**N:** 28953
| | Mean | Median | Std.Dev. | Min | Max | N.Valid | % Valid |
|---------------------:|------:|-------:|---------:|------:|------:|--------:|--------:|
| **bill.days2.valid** | 55.52 | 60.00 | 11.35 | 28.00 | 65.00 | 97.00 | 0.34 |
library('psych')
describe(df.enc.dwell$bill.days1, weights=df.enc.dwell$factor_sem, exclude.missing=TRUE, digits=3)For this analysis, the records of dwellings that are not connected to the grid are not considered. There are other factors that will be considered later the analysis of tariffs, such as users that have a small business in their properties
Weighted Frequencies
df.enc.dwell$local_com
Weights: factor_sem
| Freq | % | % Cum. | |
|---|---|---|---|
| 1 | 2046003.00 | 6.24 | 6.24 |
| 2 | 30762505.00 | 93.76 | 100.00 |
| Total | 32808508.00 | 100.00 | 100.00 |
Weighted Frequencies
df.enc.dwell$elect_loc
Weights: factor_sem
| Freq | % | % Cum. | |
|---|---|---|---|
| 1 | 844975.00 | 41.30 | 41.30 |
| 2 | 1201028.00 | 58.70 | 100.00 |
| Total | 2046003.00 | 100.00 | 100.00 |
df.enc.dwell$region.tariff
df.bill <- subset(df.enc.dwell,
select=c('folio', 'agem', 'grid', 'bill.days1',
'bill.days2', 'bill.ini.date1',
'bill.ini.date2', 'bill.end.date1',
'bill.end.date2', 'cons_med1', 'cons_med2',
'cond_energ', 'local_com', 'elect_loc',
'tariff', 'tipo_tarif.f', 'region.tariff'))
#sort(df.bill.error$bill.days1,decreasing=FALSE)
df.bill[order(-df.bill$bill.days1),]